Data Exploration

The (interactive) correlation heatmap reveals very high correlation among TG compounds.

load("./data.Rdata")
X <- df[df$Diagnosis == "Probable AD", 1:230]
C <- round(cor(X), 2)

heatmaply_cor(C, color = viridis, plot_method = "plotly", dendrogram = F, reorderfun = sort.default(d,w), main = "Correlation Heatmap", file = "heatmap.html", colorbar_thickness = 15, colorbar_len = 0.5)

RQi: Differential expression of metabolites per ApoE genotype

Global test

Performing a Global Test on the serum metabolites of AD patients, correcting for sex (Ho: E4 status has no effect on metabolite levels, Ha: it has an effect), yields a significant difference (p = 0.046) between E4 carriers and non-carriers. Testing if the counts of E4 alleles have an effect on metabolite levels showed no significant nuance.

load("data.Rdata")
gdf <- subset(df, Diagnosis == "Probable AD")
gdf$sex <- as.numeric(gdf$sex) -1
# Binary outcome
gt.b <- globaltest::gt(E4 ~ 1, E4 ~ . - APOE - Diagnosis, data = gdf)
gt.b
  p-value Statistic Expected Std.dev #Cov
1  0.0467      2.02    0.794   0.611  231
# Multinomial outcome
gdf$APOE <- as.factor(gdf$APOE)
gt.m <- globaltest::gt(APOE ~ 1, APOE ~ . - E4 -Diagnosis, data = gdf)
gt.m
  p-value Statistic Expected Std.dev #Cov
1   0.135      1.25      0.8   0.443  231

Wilcoxon rank-sum test

Triglycerides and diglycerides seem to survive FDR control.

load("ADdata.Rdata")
# Store all observations of Class 1 in C1
C1 <- ADmets[geno$APOEb == "E4NO", 1:230]
# Store all observations of Class 2 in C2
C2 <- ADmets[geno$APOEb == "E4YES", 1:230]

# Create a function to perform the Wilcoxon rank-sum (Mann-Whitney U) test on two vectors
MannWhitneyU <- function(x, y) {
  wilcoxon <- wilcox.test(x, y, paired = FALSE, alternative = "less")
  return(wilcoxon$p.value)
}

# Use purrr::map2 to apply the function to corresponding columns
wilcoxons <- purrr::map2(C1[, 1:230], C2[, 1:230], ~ MannWhitneyU(.x, .y))

# Coerce p_values to dataframe and transpose it
p_values <- t(data.frame(wilcoxons))
# Calculate the FDR-adjusted p-values
p_adj <- p.adjust(wilcoxons, method = "fdr")

results <- data.frame(p_values, p_adj)
# Filter out the non-significant (a=0.05) FDR-adjusted p-values
dplyr::filter(results, p_adj < 0.05)
                 p_values        p_adj
Lip.TG.52.2. 6.168152e-04 0.0236445812
Lip.TG.52.3. 1.357772e-04 0.0156143833
Lip.TG.52.4. 4.513399e-04 0.0236445812
Lip.TG.54.3. 1.723621e-03 0.0312827654
Lip.TG.54.4. 1.187039e-03 0.0277759895
Lip.TG.54.5. 4.598021e-04 0.0236445812
Lip.TG.54.6. 7.933971e-04 0.0240590581
Lip.TG.56.2. 1.904168e-03 0.0312827654
Lip.TG.56.3. 1.904168e-03 0.0312827654
Lip.TG.56.6. 5.528916e-04 0.0236445812
Lip.TG.56.7. 8.368368e-04 0.0240590581
Lip.TG.56.8. 1.207652e-03 0.0277759895
Lip.TG.58.9. 1.432553e-03 0.0299533905
Lip.DG.36.3. 3.523873e-06 0.0008104908

Carriers of one copy of E4 vs two copies tend to have less histamine, fumaric acid, uracil, triglyceride TG.48.0, phosphatidylcholine PC.36.4, with p < 0.05, however these don’t survive FDRcut at 0.95 (p_adj = 0.97)’

geno$g <- geno$APOE
geno$g[geno$g == "E3E4"] <- geno$g[geno$g == "E2E4"] <- "E4x1"
geno$g[geno$g == "E4E4"] <- "E4x2"
# Store all observations of Class 1 in C1
C1 <- ADmets[geno$g == "E4x1", 1:230]
# Store all observations of Class 2 in C2
C2 <- ADmets[geno$g == "E4x2", 1:230]

# Use purrr::map2 to apply the function to corresponding columns
wilcoxons <- purrr::map2(C1[, 1:230], C2[, 1:230], ~ MannWhitneyU(.x, .y))

# Coerce p_values to dataframe and transpose it
p_values <- t(data.frame(wilcoxons))
# Calculate the FDR-adjusted p-values
p_adj <- p.adjust(wilcoxons, method = "fdr")

results <- data.frame(p_values, p_adj)
# Filter out the non-significant (a=0.05) FDR-adjusted p-values
dplyr::filter(results, p_adj < 0.05)
[1] p_values p_adj   
<0 rows> (or 0-length row.names)

Testing no E4 vs one E4, no metabolites differ significantly.

geno$g <- geno$APOE
geno$g[geno$g == "E3E4"] <- geno$g[geno$g == "E2E4"] <- "E4x1"
geno$g[geno$APOEb == "E4NO"] <- "E4x0"
# Store all observations of Class 1 in C1
C1 <- ADmets[geno$g == "E4x1", 1:230]
# Store all observations of Class 2 in C2
C2 <- ADmets[geno$g == "E4x0", 1:230]

# Use purrr::map2 to apply the function to corresponding columns
wilcoxons <- purrr::map2(C1[, 1:230], C2[, 1:230], ~ MannWhitneyU(.x, .y))

# Coerce p_values to dataframe and transpose it
p_values <- t(data.frame(wilcoxons))
# Calculate the FDR-adjusted p-values
p_adj <- p.adjust(wilcoxons, method = "fdr")

results <- data.frame(p_values, p_adj)
# Filter out the non-significant (a=0.05) FDR-adjusted p-values
dplyr::filter(results, p_adj < 0.05)
[1] p_values p_adj   
<0 rows> (or 0-length row.names)

RQii - Classification of metabolites on ApoE class

The models fitted for the binary and the multinomial classification of metabolites didn’t seem to be able to discriminate well between the presence or absence of the E4 allele, nor the number of copies in AD. ### Function fit

fit <- function(title,
                X,
                y,
                model,
                ctrl = NULL,
                grid = NULL,
                seed = 123, ...) {
  set.seed(seed)
  # Merge X and y into df
  df <- merge.data.frame(X, y)

  # Train the model
  mdl <- caret::train(df[, 1:ncol(X)], df$y,
    method = model,
    tuneGrid = grid,
    trControl = ctrl,
    metric = "ROC",
    preProcess = c("center", "scale"),
    ...
  )
  # Create a confusion matrix and get performance metrics from caret
  obs <- mdl$pred$obs
  preds <- mdl$pred$pred
  cm <- confusionMatrix(obs, preds,
    dnn = c("X0", "X1"), # nolint
    positive = "X1")
  # Predictions
  ys <- as.numeric(obs) -1
  yhats <- mdl$pred$X1
  roc <- roc(ys, yhats,
    levels = c(0, 1),
    ci = TRUE, boot.n = 1000, ci.alpha = 0.95)
  metrics <- data.frame(c(cm$byClass, roc$auc),
    row.names = c(names(cm$byClass), "AUC")
  )
  names(metrics) <- title
  out <- list("metrics" = metrics, "roc" = roc, "model" = mdl)
  return(out)
}

Using 230 metabolites in AD

Binary classification of outcome 0 vs at least 1 E4 allele

load("./data.Rdata")
X <- df[df$Diagnosis == "Probable AD", 1:230]
y <- df[df$Diagnosis == "Probable AD", "E4"]
y <- as.factor(y)
levels(y) <- make.names(levels(y))

# Define the model training parameters, repeated 10-fold cross-valuidation
ctrl <- trainControl(
  method = "repeatedcv",
  number = 10,
  repeats = 50,
  classProbs = TRUE,
  summaryFunction = twoClassSummary,
  savePredictions = TRUE,
  sampling = 'smote',
)
Logistic Regression
lr <- fit(
  title = "Logistic Regression",
  X = X,
  y = y,
  model = "glmnet",
  ctrl = ctrl,
  grid = expand.grid(alpha = c(0), lambda = c(100)))
Loading required package: recipes

Attaching package: 'recipes'
The following object is masked from 'package:Matrix':

    update
The following object is masked from 'package:stats':

    step
Setting direction: controls < cases
Decision Tree
tree <- fit(
  title = "Decision Tree",
  X = X,
  y = y,
  model = "rpart2",
  ctrl = ctrl,
  grid = expand.grid(maxdepth = c(2))
)
Setting direction: controls < cases
tree$metrics
                     Decision Tree
Sensitivity             0.64646876
Specificity             0.30851052
Pos Pred Value          0.13520319
Neg Pred Value          0.83918504
Precision               0.13520319
Recall                  0.64646876
F1                      0.22363508
Prevalence              0.14326989
Detection Rate          0.09261951
Detection Prevalence    0.68503937
Balanced Accuracy       0.47748964
AUC                     0.47755338
XGBoost Forest
param <- data.frame(nrounds = c(10), max_depth = c(2), eta = c(0.3), gamma = c(0), colsample_bytree = c(0.5), min_child_weight = c(1), subsample = c(1))
rf <- fit(title = "XGBoost",
  X = X,
  y = y,
  model = "xgbTree",
  ctrl = ctrl,
  grid = param
)
Setting direction: controls > cases
rf$metrics
                       XGBoost
Sensitivity          0.6034167
Specificity          0.2373725
Pos Pred Value       0.4292642
Neg Pred Value       0.3863780
Precision            0.4292642
Recall               0.6034167
F1                   0.5016558
Prevalence           0.4873297
Detection Rate       0.2940629
Detection Prevalence 0.6850394
Balanced Accuracy    0.4203946
AUC                  0.6297622
#Get a performance metrics table
metrics <- cbind(lr$metrics, tree$metrics, rf$metrics)
metrics
                     Logistic Regression Decision Tree   XGBoost
Sensitivity                    0.6850589    0.64646876 0.6034167
Specificity                    0.3151365    0.30851052 0.2373725
Pos Pred Value                 0.9000815    0.13520319 0.4292642
Neg Pred Value                 0.1000000    0.83918504 0.3863780
Precision                      0.9000815    0.13520319 0.4292642
Recall                         0.6850589    0.64646876 0.6034167
F1                             0.7779864    0.22363508 0.5016558
Prevalence                     0.9000558    0.14326989 0.4873297
Detection Rate                 0.6165912    0.09261951 0.2940629
Detection Prevalence           0.6850394    0.68503937 0.6850394
Balanced Accuracy              0.5000977    0.47748964 0.4203946
AUC                            0.5000407    0.47755338 0.6297622
#Plot ROC curves
rocs <- list(lr$roc, tree$roc, rf$roc)
# Generate labels
labels <- paste0(names(metrics), ", AUC = ", paste(round(metrics[12, ], 2)))
# httpgd::hgd()
ggroc(rocs) +
  theme_clean() +
  scale_color_tableau(labels = labels)

Multinomial Classification of number of ApoE E4 alleles based on metabolites

multifit <- function(
                X,
                y,
                model,
                ctrl = NULL,
                grid = NULL,
                seed = 123, ...) {
  set.seed(seed)
  # Merge X and y into df
  df <- merge.data.frame(X, y)

  # Train the model
  mdl <- caret::train(df[, 1:ncol(X)], df$y,
    method = model,
    tuneGrid = grid,
    trControl = ctrl,
    metric = "logLoss",
    ...
  )
  # Create a confusion matrix and get performance metrics from caret
  obs <- mdl$pred$obs
  preds <- mdl$pred$pred
  cm <- confusionMatrix(obs, preds)
  # Predictions
  ys <- as.numeric(obs) -1
  yhats <- as.numeric(preds) -1
  roc <- multiclass.roc(ys, yhats)
  out <- list("cm" = cm, "roc" = roc, "model" = mdl)
  return(out)
}
X <- df[df$Diagnosis == "Probable AD", 1:230]
y <- df[df$Diagnosis == "Probable AD", "APOE"]
y <- as.factor(y)
levels(y) <- make.names(levels(y))

ctrl <- trainControl(
  method = "repeatedcv",
  number = 10,
  repeats = 50,
  classProbs = TRUE,
  summaryFunction = multiClassSummary,
  savePredictions = TRUE,
  sampling = "up"
)
Multinomial Logistic Regression
mlr <- multifit(
  X = X,
  y = y,
  model = "multinom",
  ctrl = ctrl,
  trace = FALSE,
  tuneLength = 1
)
Setting direction: controls < cases
Setting direction: controls < cases
Setting direction: controls < cases
mlr$cm
Confusion Matrix and Statistics

          Reference
Prediction     X0     X1     X2
        X0  66858  83103 104039
        X1 142750  87747 156853
        X2  63114  56273  45713

Overall Statistics
                                          
               Accuracy : 0.2484          
                 95% CI : (0.2475, 0.2493)
    No Information Rate : 0.3802          
    P-Value [Acc > NIR] : 1               
                                          
                  Kappa : -0.1047         
                                          
 Mcnemar's Test P-Value : <2e-16          

Statistics by Class:

                     Class: X0 Class: X1 Class: X2
Sensitivity             0.2452    0.3863   0.14909
Specificity             0.6494    0.4828   0.76115
Pos Pred Value          0.2632    0.2265   0.27688
Neg Pred Value          0.6274    0.6674   0.59321
Prevalence              0.3382    0.2816   0.38019
Detection Rate          0.0829    0.1088   0.05668
Detection Prevalence    0.3150    0.4803   0.20472
Balanced Accuracy       0.4473    0.4346   0.45512
Decision Tree
mtree <- multifit(
  X = X,
  y = y,
  model = "rpart2",
  ctrl = ctrl,
  grid = expand.grid(maxdepth=c(2))
)
Setting direction: controls < cases
Setting direction: controls < cases
Setting direction: controls < cases
mtree$cm
Confusion Matrix and Statistics

          Reference
Prediction     X0     X1     X2
        X0 190866   3130  60004
        X1 294793   4503  88054
        X2 128394   2283  34423

Overall Statistics
                                         
               Accuracy : 0.2849         
                 95% CI : (0.284, 0.2859)
    No Information Rate : 0.7614         
    P-Value [Acc > NIR] : 1              
                                         
                  Kappa : -0.01          
                                         
 Mcnemar's Test P-Value : <2e-16         

Statistics by Class:

                     Class: X0 Class: X1 Class: X2
Sensitivity             0.3108  0.454115   0.18864
Specificity             0.6719  0.519359   0.79057
Pos Pred Value          0.7514  0.011625   0.20850
Neg Pred Value          0.2340  0.987084   0.76915
Prevalence              0.7614  0.012296   0.22628
Detection Rate          0.2367  0.005584   0.04268
Detection Prevalence    0.3150  0.480315   0.20472
Balanced Accuracy       0.4913  0.486737   0.48961
XGBoost Forest
mrf <- multifit(
  X = X,
  y = y,
  model = "xgbTree",
  ctrl = ctrl,
  grid = param
)
Setting direction: controls < cases
Setting direction: controls < cases
Setting direction: controls < cases
mrf$cm
Confusion Matrix and Statistics

          Reference
Prediction     X0     X1     X2
        X0  67982  83995 102023
        X1 140453  92186 154711
        X2  61848  56383  46869

Overall Statistics
                                          
               Accuracy : 0.2567          
                 95% CI : (0.2558, 0.2577)
    No Information Rate : 0.3765          
    P-Value [Acc > NIR] : 1               
                                          
                  Kappa : -0.0949         
                                          
 Mcnemar's Test P-Value : <2e-16          

Statistics by Class:

                     Class: X0 Class: X1 Class: X2
Sensitivity             0.2515    0.3964   0.15438
Specificity             0.6531    0.4857   0.76488
Pos Pred Value          0.2676    0.2380   0.28388
Neg Pred Value          0.6338    0.6650   0.59970
Prevalence              0.3352    0.2884   0.37647
Detection Rate          0.0843    0.1143   0.05812
Detection Prevalence    0.3150    0.4803   0.20472
Balanced Accuracy       0.4523    0.4410   0.45963
#Get a performance metrics table
multimetrics <- cbind(mlr$cm$byClass[3,], mtree$cm$byClass[3,], mrf$cm$byClass[3,])
multimetrics
                           [,1]       [,2]       [,3]
Sensitivity          0.14909411 0.18863882 0.15437594
Specificity          0.76115196 0.79057133 0.76487679
Pos Pred Value       0.27688068 0.20849788 0.28388250
Neg Pred Value       0.59321431 0.76914633 0.59969751
Precision            0.27688068 0.20849788 0.28388250
Recall               0.14909411 0.18863882 0.15437594
F1                   0.19382029 0.19807182 0.19999445
Prevalence           0.38019096 0.22627689 0.37646847
Detection Rate       0.05668423 0.04268461 0.05811768
Detection Prevalence 0.20472441 0.20472441 0.20472441
Balanced Accuracy    0.45512303 0.48960507 0.45962637
#Plot ROC curves
mrocs <- list(mlr$roc, mtree$roc, mrf$roc)
# Generate labels
labels <- paste0(names(metrics), ", AUC = ", paste(round(metrics[12, ], 2)))

Projection to Latent Factors

load("thomson.Rdata")
X <- thomson
y <- df[df$Diagnosis == "Probable AD", "E4"]
y <- as.factor(y)
levels(y) <- make.names(levels(y))

Binary classification of outcome 0 vs at least 1 E4 allele

Logistic Regression
ctrl$summaryFunction <- twoClassSummary
lrf <- fit(title = "Logistic Regression",
  X = X,
  y = y,
  model = "glm",
  ctrl = ctrl,
)
Setting direction: controls > cases
Decision Tree
treef <- fit(
  title = "Decision Tree",
  X = X,
  y = y,
  model = "rpart2",
  ctrl = ctrl,
  grid = expand.grid(maxdepth = c(2))
)
Setting direction: controls < cases
XGBoost Forest
param <- data.frame(nrounds = c(10), max_depth = c(2), eta = c(0.3), gamma = c(0), colsample_bytree = 1, min_child_weight = c(1), subsample = c(1))
rff <- fit(title = "XGBoost",
  X = X,
  y = y,
  model = "xgbTree",
  ctrl = ctrl,
  grid = param
)
Setting direction: controls > cases
Model Comparison
#Get a performance metrics table
metricsf <- cbind(lrf$metrics, treef$metrics, rff$metrics)
metricsf
                     Logistic Regression Decision Tree   XGBoost
Sensitivity                    0.6731920     0.6668807 0.6437766
Specificity                    0.3029211     0.3103424 0.2725787
Pos Pred Value                 0.4953046     0.1973844 0.4761698
Neg Pred Value                 0.4770197     0.7855512 0.4269291
Precision                      0.4953046     0.1973844 0.4761698
Recall                         0.6731920     0.6668807 0.6437766
F1                             0.5707078     0.3046099 0.5474315
Prevalence                     0.5040213     0.2027590 0.5066898
Detection Rate                 0.3393031     0.1352161 0.3261951
Detection Prevalence           0.6850394     0.6850394 0.6850394
Balanced Accuracy              0.4880566     0.4886116 0.4581777
AUC                            0.5204983     0.4854656 0.5701351
#Plot ROC curves
rocsf <- list(lrf$roc, treef$roc, rff$roc)
# Generate labels
labelsf <- paste0(names(metricsf), ", AUC = ", paste(round(metricsf[12, ], 2)))
# httpgd::hgd()
ggroc(rocsf) +
  theme_clean() +
  scale_color_tableau(labels = labels)

Multinomial Classification of number of ApoE E4 alleles based on latent factors

Multinomial Logistic Regression
y <- df[df$Diagnosis == "Probable AD", "APOE"]
y <- as.factor(y)
levels(y) <- make.names(levels(y))

ctrl <- trainControl(
  method = "repeatedcv",
  number = 10,
  repeats = 10,
  classProbs = TRUE,
  summaryFunction = multiClassSummary,
  savePredictions = TRUE,
  sampling = "up"
)

mlrf <- multifit(
  X = X,
  y = y,
  model = "multinom",
  ctrl = ctrl,
  trace = FALSE,
  tuneLength = 1
)
Setting direction: controls < cases
Setting direction: controls < cases
Setting direction: controls < cases
mlrf$cm
Confusion Matrix and Statistics

          Reference
Prediction    X0    X1    X2
        X0 16591 14804 19405
        X1 27083 20639 29748
        X2 11715  9483 11822

Overall Statistics
                                          
               Accuracy : 0.3041          
                 95% CI : (0.3019, 0.3064)
    No Information Rate : 0.378           
    P-Value [Acc > NIR] : 1               
                                          
                  Kappa : -0.0224         
                                          
 Mcnemar's Test P-Value : <2e-16          

Statistics by Class:

                     Class: X0 Class: X1 Class: X2
Sensitivity             0.2995    0.4594    0.1939
Specificity             0.6770    0.5116    0.7887
Pos Pred Value          0.3266    0.2664    0.3580
Neg Pred Value          0.6489    0.7102    0.6168
Prevalence              0.3434    0.2785    0.3780
Detection Rate          0.1029    0.1280    0.0733
Detection Prevalence    0.3150    0.4803    0.2047
Balanced Accuracy       0.4883    0.4855    0.4913
Decision Tree
mtreef <- multifit(
  X = X,
  y = y,
  model = "rpart2",
  ctrl = ctrl,
  grid = mtree$model$bestTune
)
Setting direction: controls < cases
Setting direction: controls < cases
Setting direction: controls < cases
mtreef$cm
Confusion Matrix and Statistics

          Reference
Prediction    X0    X1    X2
        X0 47322   535  2943
        X1 72308   746  4416
        X2 30949   381  1690

Overall Statistics
                                          
               Accuracy : 0.3085          
                 95% CI : (0.3062, 0.3108)
    No Information Rate : 0.9336          
    P-Value [Acc > NIR] : 1               
                                          
                  Kappa : -0.0029         
                                          
 Mcnemar's Test P-Value : <2e-16          

Statistics by Class:

                     Class: X0 Class: X1 Class: X2
Sensitivity            0.31427  0.448857   0.18676
Specificity            0.67529  0.519358   0.79421
Pos Pred Value         0.93154  0.009630   0.05118
Neg Pred Value         0.06546  0.989072   0.94263
Prevalence             0.93359  0.010304   0.05610
Detection Rate         0.29340  0.004625   0.01048
Detection Prevalence   0.31496  0.480315   0.20472
Balanced Accuracy      0.49478  0.484107   0.49048
XGBoost Forest
mrff <- multifit(
  X = X,
  y = y,
  model = "xgbTree",
  ctrl = ctrl,
  grid = param
)
Setting direction: controls < cases
Setting direction: controls < cases
Setting direction: controls < cases
mrff$cm
Confusion Matrix and Statistics

          Reference
Prediction    X0    X1    X2
        X0 14192 16860 19748
        X1 27594 20049 29827
        X2 12116 11091  9813

Overall Statistics
                                         
               Accuracy : 0.2731         
                 95% CI : (0.271, 0.2753)
    No Information Rate : 0.3682         
    P-Value [Acc > NIR] : 1              
                                         
                  Kappa : -0.0746        
                                         
 Mcnemar's Test P-Value : <2e-16         

Statistics by Class:

                     Class: X0 Class: X1 Class: X2
Sensitivity            0.26329    0.4177   0.16524
Specificity            0.65911    0.4932   0.77226
Pos Pred Value         0.27937    0.2588   0.29718
Neg Pred Value         0.64060    0.6665   0.61351
Prevalence             0.33419    0.2976   0.36821
Detection Rate         0.08799    0.1243   0.06084
Detection Prevalence   0.31496    0.4803   0.20472
Balanced Accuracy      0.46120    0.4554   0.46875
# Get a performance metrics table
multimetricsf <- cbind(mlrf$cm$byClass[3, ], mtreef$cm$byClass[3, ], mrff$cm$byClass[3, ])
names(multimetricsf) <- names(metrics)
multimetricsf
                           [,1]       [,2]       [,3]
Sensitivity          0.19388274 0.18676097 0.16523540
Specificity          0.78868564 0.79420787 0.77226158
Pos Pred Value       0.35802544 0.05118110 0.29718353
Neg Pred Value       0.61680050 0.94262883 0.61351056
Precision            0.35802544 0.05118110 0.29718353
Recall               0.19388274 0.18676097 0.16523540
F1                   0.25154529 0.08034420 0.21238421
Prevalence           0.37804576 0.05610391 0.36820634
Detection Rate       0.07329655 0.01047802 0.06084072
Detection Prevalence 0.20472441 0.20472441 0.20472441
Balanced Accuracy    0.49128419 0.49048442 0.46874849
attr(,"names")
 [1] "Logistic Regression" "Decision Tree"       "XGBoost"            
 [4] NA                    NA                    NA                   
 [7] NA                    NA                    NA                   
[10] NA                    NA                    NA                   
[13] NA                    NA                    NA                   
[16] NA                    NA                    NA                   
[19] NA                    NA                    NA                   
[22] NA                    NA                    NA                   
[25] NA                    NA                    NA                   
[28] NA                    NA                    NA                   
[31] NA                    NA                    NA